package com.peng.sparktest.sparkcore

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object SparkApiTest02_PV_UV_Count {
  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("pv_uv_job").setMaster("local")
    val context = new SparkContext(conf)
    context.setLogLevel("ERROR")

    println("=================PV 网站浏览量========================")

    //42.134.182.213	山东	2018-11-12	1542011088714	3445974150374613566	www.jd.com	Buy
    val data: RDD[String] = context.textFile("test_file/pvuvdata", 5)
    data.map(line => (line.split("\t")(5), 1))
      .reduceByKey(_ + _)
      .map(_.swap)
      .sortByKey(false)
      .take(5)
      .map(_.swap)
      .foreach(println)


    println("=================UV:用户活跃数========================")

    data.map((line: String) => {
      val strings: Array[String] = line.split("\t")
      (strings(0), strings(5))
    }).distinct()
      .map((item: (String, String)) => (item._2, 1))
      .reduceByKey((_: Int) + (_: Int))
      .sortBy((_: (String, Int))._2, ascending = false)
      .take(5)
      .foreach(println)


    while (true) {

    }


  }

}
